Endowment Time Series

# load companies file of EIN to name and endowment data

companies_to_ein <- read_csv(here("data", "companies.csv")) %>%
  mutate(EIN = as.character(ein)) %>%
  select(EIN, organization_name)


endowment_data <- read_rds(here("data", 
                                "endowments_by_most_recent_filings.RDS")) %>%
  select(-c(EndowmentsHeldUnrelatedOrgInd, EndowmentsHeldRelatedOrgInd)) %>%
  pivot_longer(-c(EIN, fiscal_year),
               names_to = "variable_name") %>%
  left_join(companies_to_ein) %>%
  mutate(fiscal_year=as.numeric(paste(fiscal_year)))
# extract return dates
source(here("GET_VARS.R"))

files <- dir(here("ballet_990_released_20230208"),
              full.names = TRUE)


dates <- map_df(files,
                ~get_df(filename = .x, 
                        variables = c("//Return//ReturnHeader//TaxPeriodEndDt"))) %>%
  mutate(fiscal_year = as.numeric(paste(fiscal_year))) %>%
   filter_ein()

saveRDS(dates, here('data', 'dates.RDS')) 
dates <- readRDS( here('data', 'dates.RDS')) %>%
  select(EIN, TaxPeriodEndDt, fiscal_year) 
 

endowment_data <- endowment_data %>%
  mutate(fiscal_year=as.numeric(paste(fiscal_year))) %>%
  left_join(dates)
# function to plot variables of interest against each other
plot_ranks <- function(var1, var2, data) {

  
   plt <- data %>%
    group_by(fiscal_year) %>%
   # arrange(var1) %>%
    mutate("{var1}_rank" := rank(!!sym(var1))) %>%
#    arrange(var2) %>%
    mutate("{var2}_rank"  := rank(!!sym(var2))) %>%
    ggplot(aes(x = !!sym(glue("{var1}_rank" )), y =!!sym(glue("{var2}_rank" )),
               color  = organization_name,
               label =EIN
               )) +
    geom_point() +
    geom_function(fun=function(x)x,color="darkred", alpha = .8) +
    labs(x = paste0(var1, " Rank"),
         y =  paste0(var2, " Rank")) +
    theme_bw() +
    labs(title = glue("Rank of {var2} vs. Rank of {var1}")) +
    viridis::scale_color_viridis(discrete=TRUE,
                                 option = "rocket",
                                 end = .9) +
     facet_wrap(~fiscal_year)+
      theme(plot.title = element_text(size = 14, 
                                      hjust = .5, face="bold",),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 14),
            axis.title = element_text(size = 13, 
                                      face = "bold")) 
  
  ggplotly(plt, margin = m, height = 550)

}

# function to plot variables of interest against each other
plot_combo <- function(var1, var2, data) {
  
  data %>%
    ggplot(aes(x = !!sym(var1), y = !!sym(var2), color = EIN)) +
    geom_point(alpha = .9) +
   # geom_line(alpha = .5) +
    facet_wrap(~fiscal_year) +
    viridis::scale_color_viridis(discrete=TRUE,
                                 option = "rocket",
                                 end = .9) +
    theme_bw()+
      theme(plot.title = element_text(size = 18, 
                                      hjust = .5, face="bold",),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 16),
            axis.title = element_text(size = 13, 
                                      face = "bold"),
            legend.position = "none",
            axis.text.x = element_text(angle = 60, vjust = .6)) +
    scale_x_continuous(labels=comma) +
    scale_y_continuous(labels=comma) +
    labs(title = paste0(var2, " vs. ", var1),
         subtitle = "Fill by EIN")
  
}


endowment_data_wide <- endowment_data %>% 
  pivot_wider(names_from=variable_name,
              values_from=value) 

Plotting Endowment Variables Against Each Other, By Year

vars <-  unique(endowment_data$variable_name)[!grepl("EOY|Admin|Grants", unique(endowment_data$variable_name))]

# pairwise combinations of variables
variable_combinations <- t(combn(vars, 2)) %>%
  as.data.frame()

if (!all_plots) variable_combinations <- variable_combinations[1:4,]
cat('## Scale of Original Variables {.tabset}   \n\n')

Scale of Original Variables

pwalk(variable_combinations, ~{ 
 cat('### ',paste0(.x, ", ", .y),'\n\n')
 plt <- plot_combo(var1 = .x, var2 = .y, data = endowment_data_wide)
 print(plt)
  cat('\n\n')
 }
)

BeginningYearBalanceAmt, ContributionsAmt

BeginningYearBalanceAmt, InvestmentEarningsOrLossesAmt

BeginningYearBalanceAmt, OtherExpendituresAmt

BeginningYearBalanceAmt, EndYearBalanceAmt

By Rank

plotlist <- pmap(variable_combinations, ~{ 
 plt <- plot_ranks(var1 = .x, var2 = .y, data = endowment_data_wide)
 }
)


htmltools::tagList(setNames(plotlist, NULL))

Compensation

m <- list(
    l = 50,
    r = 50,
    b = 50,
    t = 150,
    pad = 0.5
)



source(here("GET_VARS.R"))

files <- dir(here("ballet_990_released_20230208"),
              full.names = TRUE)



##################################
# EMPLOYEE INFORMATION
##################################


employee_comp_vars <- c(
  "/Return/ReturnData/IRS990/TotalEmployeeCnt",
  "/Return/ReturnData/IRS990/EmployeeCnt",
  "/Return/ReturnData/IRS990/CYSalariesCompEmpBnftPaidAmt",
  "/Return/ReturnData/IRS990/CompCurrentOfcrDirectorsGrp/TotalAmt")


employees <- map_df(files, ~get_df(filename = .x, 
                              variables=employee_comp_vars))

employees %>%
  select(-filename) %>%
  mutate(across(-c(ReturnTs,EIN, fiscal_year), 
                as.numeric)) %>%
  rename(OffDirCompAmt =TotalAmt ) %>%
  saveRDS(here("data", "employees.RDS"))




#################
# SCHEDULE J
#################
comp <- map_df(files, ~get_df(filename = .x, schedule = "j"))


comp_clean <- comp %>%
  rename_with(.cols= everything(),
              ~gsub('/Return/ReturnData/IRS990ScheduleJ/', '', .)) %>% 
  select(-contains("Ind")) %>%
  select(fiscal_year, EIN,
         contains("RltdOrgOfficerTrstKeyEmplGrp")) %>%
  # only extract cols within the RltdOrgOfficerTrstKeyEmplGrp
  select(EIN, fiscal_year,
         matches("RltdOrgOfficerTrstKeyEmplGrp\\[.*.\\]/")) %>%
  pivot_longer(-c(EIN,fiscal_year)) %>%
  mutate(id = gsub("\\D", "", name),
       #  name_old = name,
          name = gsub(".*./", "", name),
         id = gsub("990", "", id))

  
comp_clean <- comp_clean %>%
  filter(!is.na(value)) %>%
  distinct() %>% 
  pivot_wider(names_from = name, values_from = value) 



comp_clean <- comp_clean %>%
  mutate(across(contains("Amt"), as.numeric))%>%
  mutate(TitleTxt=tolower(TitleTxt))
  
saveRDS(comp_clean, here("data", "schedj.RDS"))
comp_clean <- read_rds(here("data", "schedj.RDS"))%>%
  left_join(companies_to_ein) %>%
  mutate(fiscal_year = as.numeric(paste(fiscal_year)))

employees <- readRDS(here("data", "employees.RDS")) %>%
    mutate(fiscal_year = as.numeric(paste(fiscal_year)))
# clean up title text field because it was free text in the form 990
comp_clean <- comp_clean %>% 
  mutate(TitleTxt = gsub("dancer/choreographer",
                          "dancer / choreographer",
                         TitleTxt),
         TitleTxt = gsub("vp", "Vice President", TitleTxt),
         TitleTxt = gsub("dorector", "director",TitleTxt),
  title_clean = case_when(
    grepl("ceo", TitleTxt, ignore.case = TRUE ) ~"CEO",
    grepl("cfo", TitleTxt, ignore.case = TRUE)~ "Chief Financial Officer",
    grepl("executive dir", TitleTxt, ignore.case = TRUE) ~"Executive Director",
    grepl("artistic dir",TitleTxt,  ignore.case = TRUE) ~"Artistic Director",
    grepl("emeritus|emerita", TitleTxt, ignore.case = TRUE) ~"Emirita/Emiritus Position",
    grepl( "chief dev",TitleTxt, ignore.case=TRUE) &  
    grepl("officer",TitleTxt,  ignore.case = TRUE) ~"Chief Development Officer",
     grepl("director of market|marketing director",TitleTxt,  ignore.case = TRUE) ~ "Director of Marketing",
    grepl("music director",TitleTxt,  ignore.case = TRUE) ~"Music Director",
    grepl("mktg", TitleTxt, ignore.case = TRUE ) & 
      grepl("officer|ofc", TitleTxt, ignore.case = TRUE ) ~ "Marketing Officer",
    grepl("Director of Development",TitleTxt, ignore.case = TRUE) ~ "Director of Development",
    grepl("chief",TitleTxt, ignore.case = TRUE) & 
      grepl("officer",TitleTxt, ignore.case = TRUE) ~ "Other Chief Officer",
    grepl("Dir of Legal",TitleTxt, ignore.case = TRUE) ~"Director of Legal Affairs",
    grepl("Former Senior Dir", TitleTxt, ignore.case = TRUE) ~ "Former Senior Director",
    grepl("Director|Dir", TitleTxt, ignore.case = TRUE) ~ "Other Director",
    grepl("Director", TitleTxt, ignore.case = TRUE) ~ "Other Director",
    TRUE ~ TitleTxt
  )) 

Number of EINs with Each Title

# number of EINs with each type of title
comp_clean %>%
  group_by(title_clean) %>%
  summarize(`Number of EINs` = n_distinct(EIN)) %>% 
  arrange(desc(`Number of EINs`))

Number of Individuals with Title

# number of individuals with title
comp_clean %>%
  mutate(title_clean=tolower(title_clean)) %>%
  filter(!is.na(title_clean)) %>%
  group_by(title_clean) %>%
  summarize(`Number of Individuals in Position` = n()) %>% 
  arrange(desc(`Number of Individuals in Position`))
# missingness by variable
# comp_clean %>%
#   select(-c(EIN,fiscal_year,id)) %>%
#   is.na() %>% 
#   colSums() %>%
#   as_tibble(rownames="Variable") %>%
#   mutate(`Not Missing` = nrow(comp_clean) - value) %>%
#   select(-value)

Compensation by Title

Base Compensation

comp_clean %>% 
  group_by(title_clean) %>%
  mutate(m = median(BaseCompensationFilingOrgAmt, na.rm= TRUE)) %>%
  filter(!is.na(title_clean)) %>%
  ungroup() %>%
  ggplot(aes(x=fct_reorder(title_clean,m),
             y = BaseCompensationFilingOrgAmt)) +
  geom_jitter(alpha = .5, size = .5, height = 0, width = .05) +
  coord_flip() +
  theme_bw() +
  labs(title = "Base Compensation by Title",
       x = "Title")+
  theme(plot.title = element_text(size = 18, 
                                      hjust = .5, face="bold",),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 16),
            axis.title = element_text(size = 13, 
                                      face = "bold"),
         axis.text.x= element_text(size = 8))

Total Compensation

comp_clean %>% 
  group_by(title_clean) %>%
  mutate(m = median(TotalCompensationFilingOrgAmt, na.rm= TRUE)) %>%
  filter(!is.na(title_clean)) %>%
  ungroup() %>%
  ggplot(aes(x=fct_reorder(title_clean,m),
             y = TotalCompensationFilingOrgAmt)) +
  geom_jitter(alpha = .5, size = .5, height = 0, width = .05) +
  coord_flip() +
  theme_bw() +
  labs(title = "Total Compensation by Title",
       x = "Title")+
  theme(plot.title = element_text(size = 18, 
                                      hjust = .5, face="bold",),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 16),
            axis.title = element_text(size = 13, 
                                      face = "bold"),
         axis.text.x= element_text(size = 8))

Compensation by Year

plt <- comp_clean %>%
  group_by(EIN, fiscal_year) %>%
  summarize(total_compensation = sum(BaseCompensationFilingOrgAmt)) %>%
  group_by(EIN) %>%
  mutate(m = median(total_compensation, na.rm= TRUE)) %>%
  ungroup() %>%
#  group_by(EIN) %>%
  mutate(tile = ntile(m,2),
         tilename = ifelse(tile == 1,
                           "EINs Below the Median",
                           "EINs Above the Median"),
         tilename = factor(tilename, levels = c( "EINs Below the Median",
                                                  "EINs Above the Median"))) %>%
  ggplot(aes(x=fiscal_year,
             y = total_compensation,
            color = EIN,
            group = EIN)) +
  geom_line() +
  geom_point() +
  labs(title = "Compensation to Highest Paid Employees",
       subtitle = "Total Base Compensation to Highest Paid Employees By EIN",
       y = "Total Compensation",
       x = "Fiscal Year")+
    viridis::scale_color_viridis(discrete=TRUE,
                                 option = "rocket",
                                 end = .9) +
    theme_bw()+
    theme(plot.title = element_text(size = 18, 
                                      hjust = .5, face="bold",),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 16),
            axis.title = element_text(size = 13, 
                                      face = "bold"),
          axis.text.x = element_text(size =8, vjust = .6, angle = 60)) +
  facet_wrap(~tilename, scales = "free_y") +
  scale_y_continuous(labels = comma) +
  scale_x_continuous(labels = comma)


ggplotly(plt,height = 500, width =850) %>%
  layout(margin = m)
# plot compensation versus beginning year balance by fiscal year
comp <- comp_clean %>% 
  mutate(fiscal_year = as.numeric(paste(fiscal_year))) %>%
  left_join(endowment_data_wide)  %>%
  group_by(EIN, fiscal_year, BeginningYearBalanceAmt, organization_name) %>%
  summarize(total_compensation = sum(BaseCompensationFilingOrgAmt)) 
  
  
plt <- comp %>%
  ggplot(aes(x=BeginningYearBalanceAmt,
             y = total_compensation,
             label = organization_name,
            color = EIN)) +
  geom_point() +
  facet_wrap(~fiscal_year, nrow = 2)+
    theme_bw()+
    theme(plot.title = element_text(size = 18, 
                                      hjust = .5, face="bold",),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 16),
            axis.title = element_text(size = 13, 
                                      face = "bold"))+
    viridis::scale_color_viridis(discrete=TRUE,
                                 option = "magma",
                                 end = .9) +
  labs(title = "Total Base Compensation to Highest Paid Employees\nby Beginning of Year Balance",
      x = "Beginning of Year Balance",
      y = "Total Compensation")

ggplotly(plt, height = 500, width = 850) %>%
  layout(margin = m)
# logged scales
plt <- comp %>%
  ggplot(aes(x=BeginningYearBalanceAmt,
             y = total_compensation,
            color = EIN)) +
  geom_point() +
  facet_wrap(~fiscal_year, nrow = 2)+
    theme_bw()+
    theme(plot.title = element_text(size = 18, 
                                      hjust = .5, face="bold",),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 16),
            axis.title = element_text(size = 13, 
                                      face = "bold"))+
    viridis::scale_color_viridis(discrete=TRUE,
                                 option = "magma",
                                 end = .9) +
  scale_x_log10() +
  scale_y_log10() +
  labs(title = "Total Base Compensation to Highest Paid Employees\nby Beginning of Year Balance",
       subtitle  = "Both Axes on Log Scale"
      x = "Beginning of Year Balance",
      y = "Total Compensation")


ggplotly(plt, height = 500, width = 850) %>%
  layout(margin = m)

Ranking of Beginning of Year Balance Compared to Ranking of C-Suite Compensation

plot_ranks("BeginningYearBalanceAmt",
           "total_compensation", data = comp )


Top Employees Compensation Compared to Total Compensation

  • For total employee compensation - CYSalariesCompEmpBnftPaidAmt: Salaries, other compensation, employee benefits (Part IX, column (A), lines 5–10).
  • For top employee compensation - Schedule J, looking at all compensation except deferred
top <- comp_clean %>% 
  group_by(EIN,fiscal_year,organization_name) %>% 
  mutate(not_deferred = TotalCompensationFilingOrgAmt -DeferredCompensationFlngOrgAmt) %>%
  summarize(num_top_employees = n(),
            compensation_top_total = sum(TotalCompensationFilingOrgAmt),
            compensation_top_base = sum(BaseCompensationFilingOrgAmt),
            compensation_top_not_def = sum(not_deferred)) %>%
  ungroup()

emp_comp <- employees %>% 
  left_join(top)

plt <- emp_comp %>%
  ggplot(aes(x=fiscal_year, 
             y = compensation_top_total/CYSalariesCompEmpBnftPaidAmt,
             color = organization_name,
             group= EIN)) +
  geom_point() +
  geom_line()+
  viridis::scale_color_viridis(discrete=TRUE,
                                 option = "rocket",
                                 end = .9) +
  theme_bw() +
  labs(y="Fraction of Total Compensation Paid",
       title = "Fraction of Total Compensation Paid to C-Suite Employees",
       x="Fiscal Year") +
  scale_y_continuous(n.breaks = 6)+
    theme(plot.title = element_text(size = 14, 
                                      hjust = .5, face="bold",),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 12),
            axis.title = element_text(size = 13, 
                                      face = "bold"))


ggplotly(plt, margin = m, height = 500)
 plt <- emp_comp %>%
   filter(fiscal_year !=2014 & fiscal_year !=2021) %>% 
  ggplot(aes(y=CYSalariesCompEmpBnftPaidAmt/TotalEmployeeCnt, 
             x = compensation_top_base/num_top_employees,
             label=fiscal_year,
             color =organization_name,
             group = EIN)) +
  geom_point() +
   theme_bw() + 
  viridis::scale_color_viridis(discrete=TRUE,
                                 option = "rocket",
                                 end = .9) +
    theme(plot.title = element_text(size = 14, 
                                      hjust = .5, face="bold",),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 14),
            axis.title = element_text(size = 13, 
                                      face = "bold")) +
   scale_y_continuous(labels =comma) +
   labs(y = "Average Employee Compensation",
        x = "Average C-Suite Compensation",
        title = "Average C-Suite Compensation vs. Overall Average Employee Compensation") +
   facet_wrap(~fiscal_year)
 
 ggplotly(plt, margin = m,height = 500)
 plt <- emp_comp %>%
   mutate(num_not_top = TotalEmployeeCnt - num_top_employees,
          compensation_not_top = CYSalariesCompEmpBnftPaidAmt - compensation_top_total,
          avg_not_top = compensation_not_top/num_not_top ,
          avg_top = compensation_top_total / num_top_employees) %>%
   filter(fiscal_year !=2014 & fiscal_year !=2021) %>% 
   # something strange with Aspen Santa Fe 
   filter(avg_not_top > 0) %>% 
  ggplot(aes(y=avg_not_top, 
             x =avg_top,
             label=fiscal_year,
             color =organization_name,
             group = EIN)) +
  geom_point() +
   theme_bw() + 
  viridis::scale_color_viridis(discrete=TRUE,
                                 option = "rocket",
                                 end = .9) +
    theme(plot.title = element_text(size = 12, 
                                      hjust = .5, face="bold",
                                      margin = margin(5,5,5,5)),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 14),
            axis.title = element_text(size = 13, 
                                      face = "bold"),
          axis.text.x = element_text(angle = 20, vjust = .6)) +
   scale_y_continuous(labels =comma) +
   labs(y = "Average Employee Compensation",
        x = "Average C-Suite Compensation",
        title = "Average C-Suite Pay versus Average Employee Compensation (Not Including C-Suite)") +
   facet_wrap(~fiscal_year) +
   scale_x_continuous(labels = comma)
 

marg <- list(
    l = 50,
    r = 50,
    b = 250,
    t = 250,
    pad = 0.5
)
 
 ggplotly(plt, margin = marg, height = 500)

Comparison to Financial Data: S&P 500

sp <- read_csv(here('data','SP500.csv')) %>%
  rename_with(tolower) %>%
  mutate(sp500 = as.numeric(sp500)) %>%
  filter(!is.na(sp500)) %>%
  mutate(month = month(date),
         year = year(date)) %>%
  group_by(month,year) %>%
  arrange(month) %>%
 slice_min(n=1,order_by = date) %>%
  ungroup()


sp <- read_csv(here('data','SP500.csv')) %>%
  rename_with(tolower) %>%
  mutate(sp500 = as.numeric(sp500)) %>%
  filter(!is.na(sp500)) %>%
  mutate(month = month(date),
         year = year(date)) %>%
  group_by(month,year) %>%
  summarize(sp500 = mean(sp500, na.rm=TRUE))

endowment_sp <- endowment_data %>%
  mutate(month = month(TaxPeriodEndDt),
         year= year(TaxPeriodEndDt)) %>%
  left_join(sp) 

plt <- endowment_sp %>%
  filter(variable_name %in% c("BeginningYearBalanceAmt")) %>%
  group_by(EIN) %>%
  mutate(m = median(value, na.rm= TRUE)) %>%
  ungroup() %>%
#  group_by(EIN) %>%
  mutate(tile = ntile(m,2),
         tilename = ifelse(tile == 1,
                           "EINs Below the Median",
                           "EINs Above the Median"))%>%
  # only EINS where all observations are NA will be dropped
  filter(!is.na(tilename)) %>%
  mutate(date = as_date(paste0(month, "-", year), format = "%m-%Y")) %>%
  mutate(normalized = value/sp500,
         not = value) %>%
  select(date, normalized, not,variable_name, organization_name, tilename) %>%
  pivot_longer(c(normalized,not)) %>%
  ggplot(aes(x=date, y = value, color = organization_name)) +
  geom_point() +
  geom_line() +
  facet_wrap(~tilename+name, scales="free", ncol = 2)+
  labs(title = "Normalizing by S&P 500") +
   theme_bw() + 
  viridis::scale_color_viridis(discrete=TRUE,
                                 option = "rocket",
                                 end = .9) +
    theme(plot.title = element_text(size = 14, 
                                      hjust = .5, face="bold",),
            plot.subtitle = element_text(hjust = .5, 
                                         face="italic",
                                         size = 14),
            axis.title = element_text(size = 13, 
                                      face = "bold"),
          strip.text = element_text(margin = margin(10,5,5,5,"pt")))

ggplotly(plt, margin = m, height = 500, width = 850)